# Clean APNs for San Diego fires

pacman::p_load(tidyverse, sf, data.table, dplyr, fst, stringdist)

rm(list = ls())

source("code/globals.R")

sd <- readRDS(file.path(WORKING, "damage_sd_03_07.RDS"))

attr03 <- read_fst(file.path(WORKING, "ztraxdata", "CAattr2003.fst"), as.data.table = TRUE) %>%
  filter(FIPS == 6073)

attr07 <- read_fst(file.path(WORKING, "ztraxdata", "CAattr2007.fst"), as.data.table = TRUE) %>%
  filter(FIPS == 6073)

## Confirm unique IDs in all datasets
anyDuplicated(sd[, c("FIPS", "UnformattedAssessorParcelNumber", "INCIDENTNUM")])
anyDuplicated(attr03[, c("ImportParcelID", "BuildingOrImprovementNumber")])
anyDuplicated(attr07[, c("ImportParcelID", "BuildingOrImprovementNumber")])

## Function to inspect IDs by county in each dataset. Looks for approx string matches on address; then use that to compare APNs
inspectAPN <- function(targetfips, dinsfile, attrfile) {
  dins <- dinsfile
  attr <- attrfile
  dd <- dins[dins$FIPS == targetfips, c(
    "UnformattedAssessorParcelNumber", "Address",
    "howmanystructures", "badrecordflag"
  )]
  dd <- dd[order(dd$Address), ]
  aa <- attr[attr$FIPS == targetfips, c(
    "BuildingOrImprovementNumber", "UnformattedAssessorParcelNumber", "PropertyFullStreetAddress",
    "NoOfUnits", "PropertyLandUseStndCode"
  )]
  aa$address <- tolower(aa$PropertyFullStreetAddress)
  names(dd)[names(dd) == "UnformattedAssessorParcelNumber"] <- "UnformattedAPN_DINS"
  names(dd)[names(dd) == "Address"] <- "address_DINS"
  ## For 25 random fire records, find closest address string in ZTrax
  set.seed(16473)
  dd$rand <- runif(nrow(dd))
  dd <- dd[order(dd$rand), ]
  if (nrow(dd) > 25) {
    dd <- dd[1:25, ]
  }
  table <- data.frame(matrix(nrow = nrow(dd), ncol = 10, data = NA))
  for (i in seq(1, nrow(dd))) {
    bestmatch <- amatch(dd[i, "address_DINS"], aa$address, maxDist = 100, method = "dl")
    tablerow <- cbind(
      dd[i, c("UnformattedAPN_DINS", "address_DINS", "howmanystructures", "badrecordflag")],
      aa[min(bestmatch), c("PropertyFullStreetAddress", "UnformattedAssessorParcelNumber")]
    )
    tablerow$stringdist <- stringdist(tolower(tablerow$address_DINS), tolower(tablerow$PropertyFullStreetAddress), method = "dl")
    table[i, ] <- tablerow
  }
  names(table) <- names(tablerow)
  table <- table[, c(
    "stringdist", "UnformattedAssessorParcelNumber", "UnformattedAPN_DINS",
    "PropertyFullStreetAddress",
    "address_DINS", "howmanystructures", "badrecordflag"
  )]
  table <- table[order(table$stringdist), ]
  table$lengthUAPN_DINS <- nchar(table$UnformattedAPN_DINS)
  table$lengthUAPN_Ztrax <- nchar(table$UnformattedAssessorParcelNumber)
  return(table)
}

## -- No changes required for either fire
inspectAPN(6073, sd[sd$INCIDENTNUM == "Paradise2003", ], attr03)
inspectAPN(6073, sd[sd$INCIDENTNUM == "Witch2007", ], attr07)

damage03 <- sd[sd$INCIDENTNUM == "Paradise2003", ] %>%
  st_drop_geometry() %>%
  data.table()

damage07 <- sd[sd$INCIDENTNUM == "Witch2007", ] %>%
  st_drop_geometry() %>%
  data.table()

write_fst(damage03, file.path(WORKING, "damage03_SanDiego_cleaned.fst"))
write_fst(damage07, file.path(WORKING, "damage07_SanDiego_cleaned.fst"))
write_fst(attr03, file.path(WORKING, "attr03_SanDiego_cleaned.fst"))
write_fst(attr07, file.path(WORKING, "attr07_SanDiego_cleaned.fst"))
